<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 4.2.0">
  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/images/logo.svg" color="#222">

<link rel="stylesheet" href="/css/main.css">


<link rel="stylesheet" href="/lib/font-awesome/css/font-awesome.min.css">
  <link rel="stylesheet" href="//cdn.jsdelivr.net/gh/fancyapps/fancybox@3/dist/jquery.fancybox.min.css">
  <link rel="stylesheet" href="/lib/pace/pace-theme-minimal.min.css">
  <script src="/lib/pace/pace.min.js"></script>

<script id="hexo-configurations">
    var NexT = window.NexT || {};
    var CONFIG = {"hostname":"lanqilu.github.io","root":"/","scheme":"Gemini","version":"7.8.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12,"onmobile":true},"copycode":{"enable":true,"show_result":true,"style":null},"back2top":{"enable":true,"sidebar":true,"scrollpercent":true},"bookmark":{"enable":false,"color":"#395ca3","save":"auto"},"fancybox":true,"mediumzoom":false,"lazyload":false,"pangu":true,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":true,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},"path":"search.xml"};
  </script>

  <meta name="description" content="有人在碰到问题时,就想：“我知道,我可以用正则表达式。”现在，他就有了两个问题。">
<meta property="og:type" content="article">
<meta property="og:title" content="正则表达式">
<meta property="og:url" content="https://lanqilu.github.io/2020/03/27/Python/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F/index.html">
<meta property="og:site_name" content="Halo">
<meta property="og:description" content="有人在碰到问题时,就想：“我知道,我可以用正则表达式。”现在，他就有了两个问题。">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="http://img.whl123456.top/image/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F.png">
<meta property="article:published_time" content="2020-03-27T14:37:28.000Z">
<meta property="article:modified_time" content="2020-03-28T11:41:50.164Z">
<meta property="article:author" content="Lanqilu">
<meta property="article:tag" content="Python">
<meta property="article:tag" content="正则表达式">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="http://img.whl123456.top/image/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F.png">

<link rel="canonical" href="https://lanqilu.github.io/2020/03/27/Python/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F/">


<script id="page-configurations">
  // https://hexo.io/docs/variables.html
  CONFIG.page = {
    sidebar: "",
    isHome : false,
    isPost : true,
    lang   : 'zh-CN'
  };
</script>

  <title>正则表达式 | Halo</title>
  
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-161500949-1"></script>
    <script data-pjax>
      if (CONFIG.hostname === location.hostname) {
        window.dataLayer = window.dataLayer || [];
        function gtag(){dataLayer.push(arguments);}
        gtag('js', new Date());
        gtag('config', 'UA-161500949-1');
      }
    </script>


  <script data-pjax>
    var _hmt = _hmt || [];
    (function() {
      var hm = document.createElement("script");
      hm.src = "https://hm.baidu.com/hm.js?256151d1651e9d73ec980b2fc69de8f6";
      var s = document.getElementsByTagName("script")[0];
      s.parentNode.insertBefore(hm, s);
    })();
  </script>




  <noscript>
  <style>
  .use-motion .brand,
  .use-motion .menu-item,
  .sidebar-inner,
  .use-motion .post-block,
  .use-motion .pagination,
  .use-motion .comments,
  .use-motion .post-header,
  .use-motion .post-body,
  .use-motion .collection-header { opacity: initial; }

  .use-motion .site-title,
  .use-motion .site-subtitle {
    opacity: initial;
    top: initial;
  }

  .use-motion .logo-line-before i { left: initial; }
  .use-motion .logo-line-after i { right: initial; }
  </style>
</noscript>

</head>

<body itemscope itemtype="http://schema.org/WebPage">
  <div class="container use-motion">
    <div class="headband"></div>

    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-container">
  <div class="site-nav-toggle">
    <div class="toggle" aria-label="切换导航栏">
      <span class="toggle-line toggle-line-first"></span>
      <span class="toggle-line toggle-line-middle"></span>
      <span class="toggle-line toggle-line-last"></span>
    </div>
  </div>

  <div class="site-meta">

    <a href="/" class="brand" rel="start">
      <span class="logo-line-before"><i></i></span>
      <h1 class="site-title">Halo</h1>
      <span class="logo-line-after"><i></i></span>
    </a>
  </div>

  <div class="site-nav-right">
    <div class="toggle popup-trigger">
        <i class="fa fa-search fa-fw fa-lg"></i>
    </div>
  </div>
</div>




<nav class="site-nav">
  <ul id="menu" class="menu">
        <li class="menu-item menu-item-home">

    <a href="/" rel="section"><i class="fa fa-fw fa-home"></i>首页</a>

  </li>
        <li class="menu-item menu-item-tags">

    <a href="/tags/" rel="section"><i class="fa fa-fw fa-tags"></i>标签<span class="badge">39</span></a>

  </li>
        <li class="menu-item menu-item-categories">

    <a href="/categories/" rel="section"><i class="fa fa-fw fa-th"></i>分类<span class="badge">18</span></a>

  </li>
        <li class="menu-item menu-item-archives">

    <a href="/archives/" rel="section"><i class="fa fa-fw fa-archive"></i>归档<span class="badge">85</span></a>

  </li>
        <li class="menu-item menu-item-python">

    <a href="/categories/Python/" rel="section"><i class="fa fa-fw fa-code"></i>Python</a>

  </li>
        <li class="menu-item menu-item-java">

    <a href="/categories/Java/" rel="section"><i class="fa fa-fw fa-code"></i>Java</a>

  </li>
      <li class="menu-item menu-item-search">
        <a role="button" class="popup-trigger"><i class="fa fa-search fa-fw"></i>搜索
        </a>
      </li>
  </ul>
</nav>



  <div class="search-pop-overlay">
    <div class="popup search-popup">
        <div class="search-header">
  <span class="search-icon">
    <i class="fa fa-search"></i>
  </span>
  <div class="search-input-container">
    <input autocomplete="off" autocapitalize="off"
           placeholder="搜索..." spellcheck="false"
           type="search" class="search-input">
  </div>
  <span class="popup-btn-close">
    <i class="fa fa-times-circle"></i>
  </span>
</div>
<div id="search-result">
  <div id="no-result">
    <i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>
  </div>
</div>

    </div>
  </div>

</div>
    </header>

    
  <div class="reading-progress-bar"></div>


    <main class="main">
      <div class="main-inner">
        <div class="content-wrap">
          

          <div class="content post posts-expand">
            

    
  
  
  <article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
    <link itemprop="mainEntityOfPage" href="https://lanqilu.github.io/2020/03/27/Python/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="http://img.whl123456.top/image/avatar.jpg">
      <meta itemprop="name" content="Lanqilu">
      <meta itemprop="description" content="">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Halo">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          正则表达式
        </h1>

        <div class="post-meta">
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              <span class="post-meta-item-text">发表于</span>

              <time title="创建时间：2020-03-27 22:37:28" itemprop="dateCreated datePublished" datetime="2020-03-27T22:37:28+08:00">2020-03-27</time>
            </span>
              <span class="post-meta-item">
                <span class="post-meta-item-icon">
                  <i class="fa fa-calendar-check-o"></i>
                </span>
                <span class="post-meta-item-text">更新于</span>
                <time title="修改时间：2020-03-28 19:41:50" itemprop="dateModified" datetime="2020-03-28T19:41:50+08:00">2020-03-28</time>
              </span>
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              <span class="post-meta-item-text">分类于</span>
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/Python/" itemprop="url" rel="index"><span itemprop="name">Python</span></a>
                </span>
            </span>

          

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">

      
        <div class="note info no-icon">
            <p>有人在碰到问题时,就想：“我知道,我可以用正则表达式。”现在，他就有了两个问题。</p>
          </div>

<a id="more"></a>

<hr>
<p><img src="http://img.whl123456.top/image/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F.png" alt="正则表达式"></p>
<p>正则表达式是一个特殊的字符序列，它能帮助你方便的检查一个字符串是否与某种模式匹配。为高级的文本模式匹配、抽取、与/或文本形式的搜索和替换提供了基础。</p>
<p>re 模块使 Python 语言拥有全部的正则表达式功能。</p>
<ul>
<li>搜索，在字符串任意部分中搜索匹配的模式。<code>search()</code></li>
<li>匹配，是判断一个字符串能否从起始处全部或者部分地匹配某个模式。<code>match()</code></li>
</ul>
<p><code>compile</code>函数根据一个模式字符串和可选的标志参数生成一个正则表达式对象。<br>该对象拥有一系列方法用于正则表达式匹配和替换。</p>
<p>re 模块也提供了与这些方法功能完全一致的函数，这些函数使用一个模式字符串做为它们的第一个参数。</p>
<h2 id="特殊符号和字符"><a href="#特殊符号和字符" class="headerlink" title="特殊符号和字符"></a>特殊符号和字符</h2><center>常见正则表达式符号和特殊字符</center>


<table>
<thead>
<tr>
<th align="left">表示法</th>
<th align="left">描述</th>
</tr>
</thead>
<tbody><tr>
<td align="left"><em>literal</em></td>
<td align="left">匹配文本字符串的字面值<em>literal</em></td>
</tr>
<tr>
<td align="left">re1`</td>
<td align="left">`re2</td>
</tr>
<tr>
<td align="left"><code>.</code></td>
<td align="left">匹配任何字符（除了<code>\n</code>之外）</td>
</tr>
<tr>
<td align="left"><code>^</code></td>
<td align="left">匹配字符串起始部分</td>
</tr>
<tr>
<td align="left"><code>$</code></td>
<td align="left">匹配字符串终止部分</td>
</tr>
<tr>
<td align="left"><code>*</code></td>
<td align="left">匹配 0 次或者多次前面出现的正则表达式</td>
</tr>
<tr>
<td align="left"><code>+</code></td>
<td align="left">匹配 1 次或者多次前面出现的正则表达式</td>
</tr>
<tr>
<td align="left"><code>?</code></td>
<td align="left">匹配 0 次或者 1 次前面出现的正则表达式，<strong>非贪婪</strong></td>
</tr>
<tr>
<td align="left"><code>{</code>N<code>}</code></td>
<td align="left">匹配 N 次前面出现的正则表达式</td>
</tr>
<tr>
<td align="left"><code>{</code>M<code>,</code>N<code>}</code></td>
<td align="left">匹配 M～N 次前面出现的正则表达式</td>
</tr>
<tr>
<td align="left"><code>[</code>…<code>]</code></td>
<td align="left">匹配来自字符集的任意单一字符</td>
</tr>
<tr>
<td align="left"><code>[</code>..x−y..<code>]</code></td>
<td align="left">匹配 x ～ y 范围中的任意单一字符</td>
</tr>
<tr>
<td align="left"><code>[^</code>…<code>]</code></td>
<td align="left">不匹配此字符集中出现的任何一个字符，包括某一范围的字符</td>
</tr>
<tr>
<td align="left"><code>(</code>…<code>)</code></td>
<td align="left">匹配封闭的正则表达式，然后另存为子组</td>
</tr>
<tr>
<td align="left"><code>\d</code></td>
<td align="left">匹配任何十进制数字，与<code>[0-9]</code>一致(<code>\D</code>与之相反)</td>
</tr>
<tr>
<td align="left"><code>\b</code></td>
<td align="left">匹配任何单词边界，指单词和空格间的位置（<code>\B</code>与之相反）</td>
</tr>
<tr>
<td align="left"><code>\w</code></td>
<td align="left">匹配任何字母数字字符，与<code>[A-Za-z0-9_]</code>相同(<code>\W</code>与之相反)</td>
</tr>
<tr>
<td align="left"><code>\s</code></td>
<td align="left">匹配任何空格字符，与<code>[\n\t\r\v\f]</code>相同（<code>\S</code>与之相反）</td>
</tr>
<tr>
<td align="left"><code>\N</code></td>
<td align="left">匹配已保存的子组 N（参见上面的<code>(</code>…<code>)</code>)</td>
</tr>
<tr>
<td align="left"><code>\</code><em>c</em></td>
<td align="left">逐字匹配任何特殊字符<em>c</em>(即仅按照字面意义匹配，不匹配特殊含义)</td>
</tr>
<tr>
<td align="left"><code>\A(\Z)</code></td>
<td align="left">匹配字符串的起始（结束）（另见上面介绍的<code>^</code>和<code>$</code>）</td>
</tr>
<tr>
<td align="left"><code>(?</code>iLmsux<code>)</code></td>
<td align="left">嵌入一个或者多个特殊“标记”参数（或者通过函数/方法）</td>
</tr>
<tr>
<td align="left"><code>(?:</code>…<code>)</code></td>
<td align="left">表示一个匹配不用保存的分组</td>
</tr>
<tr>
<td align="left"><code>(?P</code>&lt;<em>name</em>&gt;…<code>)</code></td>
<td align="left">像一个仅由name标识而不是数字 ID 标识的正则分组匹配</td>
</tr>
<tr>
<td align="left"><code>(?P=</code><em>name</em><code>)</code></td>
<td align="left">在同一字符串中匹配由<code>(?P&lt;name)</code>分组的之前文本</td>
</tr>
<tr>
<td align="left"><code>(?#</code>…<code>)</code></td>
<td align="left">表示注释，所有内容都被忽略</td>
</tr>
<tr>
<td align="left"><code>(?=</code>…<code>)</code></td>
<td align="left">匹配条件是如果…出现在之后的位置，而不使用输入字符串；称作正向前视断言</td>
</tr>
<tr>
<td align="left"><code>(?!</code>…<code>)</code></td>
<td align="left">匹配条件是如果…不出现在之后的位置，而不使用输入字符串；称作负向前视断言</td>
</tr>
<tr>
<td align="left"><code>(?&lt;=</code>…<code>)</code></td>
<td align="left">匹配条件是如果…出现在之前的位置，而不使用输入字符串；称作正向后视断言</td>
</tr>
<tr>
<td align="left"><code>(?&lt;!</code>…<code>)</code></td>
<td align="left">匹配条件是如果…不出现在之前的位置，而不使用输入字符串；称作负向后视断言</td>
</tr>
</tbody></table>
<h2 id="常用正则表达式"><a href="#常用正则表达式" class="headerlink" title="常用正则表达式"></a>常用正则表达式</h2><table>
<thead>
<tr>
<th align="center">用处</th>
<th>值</th>
</tr>
</thead>
<tbody><tr>
<td align="center">用户名</td>
<td><code>/^\[a-z0-9_-\]{3,16}$/</code></td>
</tr>
<tr>
<td align="center">密码</td>
<td><code>/^\[a-z0-9_-\]{6,18}$/</code></td>
</tr>
<tr>
<td align="center">十六进制值</td>
<td><code>/^#?(\[a-f0-9\]{6}|\[a-f0-9\]{3})$/</code></td>
</tr>
<tr>
<td align="center">电子邮箱</td>
<td><code>/^(\[a-z0-9_\.-]+)@(\[\da-z\.-\]+)\.(\[a-z\.\]{2,6})\$/</code><br /><code>/^\[a-z\d\]+(\.\[a-z\d\]+)*@(\[\da-z](-\[\da-z\])?)+(\.{1,2}\[a-z\]+)+$/</code></td>
</tr>
<tr>
<td align="center">URL</td>
<td><code>/^(https?:\/\/)?(\[\da-z\.-\]+)\.(\[a-z\.\]{2,6})([\/\w \.-]*)*\/?$/</code></td>
</tr>
<tr>
<td align="center">IP 地址</td>
<td><code>/((2\[0-4\]\d|25\[0-5\]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)/ &lt;br /&gt;/^(?:(?:25[0-5]|2\[0-4][0-9]|[01]?\[0-9][0-9]?)\.){3}(?:25[0-5]|2\[0-4][0-9]|[01]?\[0-9][0-9]?)$/</code></td>
</tr>
<tr>
<td align="center">HTML 标签</td>
<td><code>/\^&lt;([a-z]+)(\[^&lt;]+)\*(?:&gt;(.*)&lt;\/\1&gt;|\s+\/&gt;)$/</code></td>
</tr>
<tr>
<td align="center">删除代码\注释</td>
<td><code>(?&lt;!http:|\S)//.*$</code></td>
</tr>
<tr>
<td align="center">Unicode编码中的汉字范围</td>
<td><code>/^[\u2E80-\u9FFF]+$/</code></td>
</tr>
</tbody></table>
<h2 id="re-search"><a href="#re-search" class="headerlink" title="re.search()"></a><code>re.search()</code></h2><p><code>re.search</code> 扫描整个字符串并返回第一个成功的匹配。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">print(re.search(<span class="string">'www'</span>, <span class="string">'www.runoob.com'</span>).span())</span><br><span class="line">print(re.search(<span class="string">'com'</span>, <span class="string">'www.runoob.com'</span>).span())</span><br><span class="line">print(re.search(<span class="string">'com'</span>, <span class="string">'www.runoob.com.com'</span>).span())</span><br></pre></td></tr></table></figure>

<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">(<span class="number">0</span>, <span class="number">3</span>)</span><br><span class="line">(<span class="number">11</span>, <span class="number">14</span>)</span><br><span class="line">(<span class="number">11</span>, <span class="number">14</span>)</span><br></pre></td></tr></table></figure>

<h2 id="re-match"><a href="#re-match" class="headerlink" title="re.match()"></a><code>re.match()</code></h2><p><code>re.match</code>尝试从字符串的起始位置匹配一个模式，如果不是起始位置匹配成功的话，<code>match()</code>就返回none。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> re</span><br><span class="line">print(re.match(<span class="string">'www'</span>, <span class="string">'www.runoob.com'</span>).span())  <span class="comment"># 在起始位置匹配</span></span><br><span class="line">print(re.match(<span class="string">'com'</span>, <span class="string">'www.runoob.com'</span>))         <span class="comment"># 不在起始位置匹配</span></span><br></pre></td></tr></table></figure>

<pre><code>(0, 3)
None</code></pre><h2 id="group-和groups"><a href="#group-和groups" class="headerlink" title="group()和groups()"></a><code>group()</code>和<code>groups()</code></h2><p><code>group(num)</code>或<code>groups()</code>匹配对象函数来获取匹配表达式。</p>
<p><code>group(num=0)</code>匹配的整个表达式的字符串，<code>group()</code>可以一次输入多个组号，在这种情况下它将返回一个包含那些组所对应值的元组。</p>
<p><code>groups()</code>返回一个包含所有小组字符串的元组，从 1 到 所含的小组号。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">line = <span class="string">"Cats are smarter than dogs"</span></span><br><span class="line"><span class="comment"># .* 表示任意匹配除换行符（\n、\r）之外的任何单个或多个字符</span></span><br><span class="line">matchObj = re.match( <span class="string">r'(.*) are (.*?) .*'</span>, line, re.M|re.I)</span><br><span class="line"> </span><br><span class="line"><span class="keyword">if</span> matchObj:</span><br><span class="line">   <span class="keyword">print</span> (<span class="string">"matchObj.group() : "</span>, matchObj.group())</span><br><span class="line">   <span class="keyword">print</span> (<span class="string">"matchObj.group(1) : "</span>, matchObj.group(<span class="number">1</span>))</span><br><span class="line">   <span class="keyword">print</span> (<span class="string">"matchObj.group(2) : "</span>, matchObj.group(<span class="number">2</span>))</span><br><span class="line">   <span class="keyword">print</span> (<span class="string">"matchObj.groups() :"</span>,matchObj.groups())</span><br><span class="line"><span class="keyword">else</span>:</span><br><span class="line">   <span class="keyword">print</span> (<span class="string">"No match!!"</span>)</span><br></pre></td></tr></table></figure>

<pre><code>matchObj.group() :  Cats are smarter than dogs
matchObj.group(1) :  Cats
matchObj.group(2) :  smarter
matchObj.groups() : (&apos;Cats&apos;, &apos;smarter&apos;)</code></pre><h2 id="re-sub"><a href="#re-sub" class="headerlink" title="re.sub()"></a><code>re.sub()</code></h2><p><code>re.sub</code>用于替换字符串中的匹配项。</p>
<p><code>re.sub(pattern, repl, string, count=0, flags=0)</code></p>
<ul>
<li><code>pattern</code> : 正则中的模式字符串。</li>
<li><code>repl</code> : 替换的字符串，也可为一个函数。</li>
<li><code>string</code> : 要被查找替换的原始字符串。</li>
<li><code>count</code> : 模式匹配后替换的最大次数，默认 0 表示替换所有的匹配。</li>
<li><code>flags</code> : 编译时用的匹配模式，数字形式。</li>
</ul>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line">phone = <span class="string">"2004-959-559 # 这是一个电话号码"</span></span><br><span class="line"> </span><br><span class="line"><span class="comment"># 删除注释</span></span><br><span class="line">num = re.sub(<span class="string">r'#.*$'</span>, <span class="string">""</span>, phone)</span><br><span class="line"><span class="keyword">print</span> (<span class="string">"电话号码 : "</span>, num)  <span class="comment"># 电话号码 :  2004-959-559 </span></span><br><span class="line"> </span><br><span class="line"><span class="comment"># 移除非数字的内容</span></span><br><span class="line">num = re.sub(<span class="string">r'\D'</span>, <span class="string">""</span>, phone)</span><br><span class="line"><span class="keyword">print</span> (<span class="string">"电话号码 : "</span>, num)  <span class="comment"># 电话号码 :  2004959559</span></span><br></pre></td></tr></table></figure>

<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 将匹配的数字乘于 2</span></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">double</span><span class="params">(matched)</span>:</span></span><br><span class="line">    value = int(matched.group(<span class="string">'value'</span>))</span><br><span class="line">    <span class="keyword">return</span> str(value * <span class="number">2</span>)</span><br><span class="line"> </span><br><span class="line">s = <span class="string">'A23G4HFD567'</span></span><br><span class="line">print(re.sub(<span class="string">'(?P&lt;value&gt;\d+)'</span>, double, s))  <span class="comment"># A46G8HFD1134</span></span><br></pre></td></tr></table></figure>

<h2 id="re-compile"><a href="#re-compile" class="headerlink" title="re.compile"></a><code>re.compile</code></h2><p><code>compile</code>函数用于编译正则表达式,生成一个正则表达式(Pattern)对象,供<code>match()</code>和<code>search()</code>这两个函数使用。<br><code>re.compile(pattern[, flags])</code></p>
<ul>
<li><code>pattern</code> : 一个字符串形式的正则表达式</li>
<li><code>flags</code> 可选，表示匹配模式，比如忽略大小写，多行模式等，具体参数为：<ul>
<li><code>re.I</code> 忽略大小写</li>
<li><code>re.L</code> 表示特殊字符集 \w, \W, \b, \B, \s, \S 依赖于当前环境</li>
<li><code>re.M</code> 多行模式</li>
<li><code>re.S</code> 即为’ . ‘并且包括换行符在内的任意字符（’ . ‘不包括换行符）</li>
<li><code>re.U</code>表示特殊字符集 \w, \W, \b, \B, \d, \D, \s, \S 依赖于 Unicode 字符属性数据库</li>
<li><code>re.X</code> 为了增加可读性，忽略空格和’ # ‘后面的注释</li>
</ul>
</li>
</ul>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">pattern = re.compile(<span class="string">r'\d+'</span>)                    <span class="comment"># 用于匹配至少一个数字</span></span><br><span class="line">pattern = re.compile(<span class="string">r'\d+'</span>)                    <span class="comment"># 用于匹配至少一个数字</span></span><br><span class="line">m = pattern.match(<span class="string">'one12twothree34four'</span>)        <span class="comment"># 查找头部，没有匹配</span></span><br><span class="line">print(<span class="string">"从头部开始匹配"</span>,m)  <span class="comment"># 从头部开始匹配 None</span></span><br><span class="line">m = pattern.match(<span class="string">'one12twothree34four'</span>, <span class="number">3</span>, <span class="number">10</span>) <span class="comment"># 从'1'的位置开始匹配，正好匹配</span></span><br><span class="line">print(m)  <span class="comment"># &lt;re.Match object; span=(3, 5), match='12'&gt;     # 返回一个 Match 对象</span></span><br><span class="line">print(m.group())  <span class="comment"># 12</span></span><br></pre></td></tr></table></figure>

<h2 id="re-findall"><a href="#re-findall" class="headerlink" title="re.findall()"></a><code>re.findall()</code></h2><p>在字符串中找到正则表达式所匹配的所有子串，并返回一个列表，如果没有找到匹配的，则返回空列表。</p>
<p><code>re.findall(string[, pos[, endpos]])</code></p>
<ul>
<li><code>string</code> 待匹配的字符串。</li>
<li><code>pos</code> 可选参数，指定字符串的起始位置，默认为 0。</li>
<li><code>endpos</code> 可选参数，指定字符串的结束位置，默认为字符串的长度。</li>
</ul>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">pattern = re.compile(<span class="string">r'\d+'</span>)   <span class="comment"># 查找数字</span></span><br><span class="line">result1 = pattern.findall(<span class="string">'runoob 123 google 456'</span>)</span><br><span class="line">result2 = pattern.findall(<span class="string">'run88oob123google456'</span>, <span class="number">0</span>, <span class="number">10</span>)</span><br><span class="line"> </span><br><span class="line">print(result1)  <span class="comment"># ['123', '456']</span></span><br><span class="line">print(result2)  <span class="comment"># ['88', '12']</span></span><br></pre></td></tr></table></figure>

<h2 id="re-finditer"><a href="#re-finditer" class="headerlink" title="re.finditer()"></a><code>re.finditer()</code></h2><p>和 <code>findall</code> 类似，在字符串中找到正则表达式所匹配的所有子串，并把它们作为一个迭代器返回。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">it = re.finditer(<span class="string">r"\d+"</span>,<span class="string">"12a32bc43jf3"</span>) </span><br><span class="line"><span class="keyword">for</span> match <span class="keyword">in</span> it: </span><br><span class="line">    <span class="keyword">print</span> (match.group() ) <span class="comment">## 12 32 43 3</span></span><br></pre></td></tr></table></figure>

<h2 id="re-split"><a href="#re-split" class="headerlink" title="re.split"></a><code>re.split</code></h2><p><code>split</code>方法按照能够匹配的子串将字符串分割后返回列表，它的使用形式如下：</p>
<p><code>re.split(pattern, string[, maxsplit=0, flags=0])</code></p>
<ul>
<li><code>pattern</code>匹配的正则表达式</li>
<li><code>string</code>要匹配的字符串。</li>
<li><code>maxsplit</code>分隔次数，<code>maxsplit=1</code>分隔一次，默认为 0，不限制次数。</li>
<li><code>flags</code>标志位，用于控制正则表达式的匹配方式，如：是否区分大小写，多行匹配等等</li>
</ul>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">re.split(<span class="string">'\W+'</span>, <span class="string">'runoob, runoob, runoob.'</span>)  <span class="comment"># ['runoob', 'runoob', 'runoob', '']</span></span><br></pre></td></tr></table></figure>

<p>当匹配成功时返回一个 Match 对象，其中：</p>
<ul>
<li><code>group(\[group1, …])</code> 方法用于获得一个或多个分组匹配的字符串，当要获得整个匹配的子串时，可直接使用 <code>group()</code>或 <code>group(0)</code>；</li>
<li><code>start(\[group])</code> 方法用于获取分组匹配的子串在整个字符串中的起始位置（子串第一个字符的索引），参数默认值为 0；</li>
<li><code>end(\[group])</code> 方法用于获取分组匹配的子串在整个字符串中的结束位置（子串最后一个字符的索引+1），参数默认值为 0；</li>
<li><code>span(\[group])</code>方法返回<code>(start(group), end(group))</code>。</li>
</ul>

    </div>

    
    
    
      
  <div class="popular-posts-header">相关文章</div>
  <ul class="popular-posts">
    <li class="popular-posts-item">
      <div class="popular-posts-title"><a href="\2020\03\23\Java\Java书单\" rel="bookmark">计算机书单</a></div>
    </li>
    <li class="popular-posts-item">
      <div class="popular-posts-title"><a href="\2020\03\24\Python\Python网络数据采集\" rel="bookmark">Python网络数据采集</a></div>
    </li>
    <li class="popular-posts-item">
      <div class="popular-posts-title"><a href="\2020\04\06\Python\Django\" rel="bookmark">Django</a></div>
    </li>
    <li class="popular-posts-item">
      <div class="popular-posts-title"><a href="\2019\10\01\Python\函数式编程\" rel="bookmark">函数式编程</a></div>
    </li>
    <li class="popular-posts-item">
      <div class="popular-posts-title"><a href="\2019\10\18\Python\面向对象\" rel="bookmark">面向对象</a></div>
    </li>
  </ul>


      <footer class="post-footer">
          
          <div class="post-tags">
              <a href="/tags/Python/" rel="tag"><i class="fa fa-tag"></i> Python</a>
              <a href="/tags/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F/" rel="tag"><i class="fa fa-tag"></i> 正则表达式</a>
          </div>

        


        
    <div class="post-nav">
      <div class="post-nav-item">
    <a href="/2020/03/26/%E4%B8%AD%E5%9B%BD%E8%BF%91%E4%BB%A3%E5%8F%B2/zgjds_1.0_%E7%BB%BC%E8%BF%B0%E9%A3%8E%E4%BA%91%E5%8F%98%E5%B9%BB%E7%9A%84%E5%85%AB%E5%8D%81%E5%B9%B4/" rel="prev" title="综述 风云变幻的八十年">
      <i class="fa fa-chevron-left"></i> 综述 风云变幻的八十年
    </a></div>
      <div class="post-nav-item">
    <a href="/2020/03/29/%E4%B8%AD%E5%9B%BD%E8%BF%91%E4%BB%A3%E5%8F%B2/zgjds_1.1_%E8%B5%84%E6%9C%AC-%E5%B8%9D%E5%9B%BD%E4%B8%BB%E4%B9%89%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9A%84%E4%BE%B5%E7%95%A5/" rel="next" title="资本-帝国主义对中国的侵略">
      资本-帝国主义对中国的侵略 <i class="fa fa-chevron-right"></i>
    </a></div>
    </div>
      </footer>
    
  </article>
  
  
  



          </div>
          

<script>
  window.addEventListener('tabs:register', () => {
    let { activeClass } = CONFIG.comments;
    if (CONFIG.comments.storage) {
      activeClass = localStorage.getItem('comments_active') || activeClass;
    }
    if (activeClass) {
      let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
      if (activeTab) {
        activeTab.click();
      }
    }
  });
  if (CONFIG.comments.storage) {
    window.addEventListener('tabs:click', event => {
      if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
      let commentClass = event.target.classList[1];
      localStorage.setItem('comments_active', commentClass);
    });
  }
</script>

        </div>
          
  
  <div class="toggle sidebar-toggle">
    <span class="toggle-line toggle-line-first"></span>
    <span class="toggle-line toggle-line-middle"></span>
    <span class="toggle-line toggle-line-last"></span>
  </div>

  <aside class="sidebar">
    <div class="sidebar-inner">

      <ul class="sidebar-nav motion-element">
        <li class="sidebar-nav-toc">
          文章目录
        </li>
        <li class="sidebar-nav-overview">
          站点概览
        </li>
      </ul>

      <!--noindex-->
      <div class="post-toc-wrap sidebar-panel">
          <div class="post-toc motion-element"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#特殊符号和字符"><span class="nav-number">1.</span> <span class="nav-text">特殊符号和字符</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#常用正则表达式"><span class="nav-number">2.</span> <span class="nav-text">常用正则表达式</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#re-search"><span class="nav-number">3.</span> <span class="nav-text">re.search()</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#re-match"><span class="nav-number">4.</span> <span class="nav-text">re.match()</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#group-和groups"><span class="nav-number">5.</span> <span class="nav-text">group()和groups()</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#re-sub"><span class="nav-number">6.</span> <span class="nav-text">re.sub()</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#re-compile"><span class="nav-number">7.</span> <span class="nav-text">re.compile</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#re-findall"><span class="nav-number">8.</span> <span class="nav-text">re.findall()</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#re-finditer"><span class="nav-number">9.</span> <span class="nav-text">re.finditer()</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#re-split"><span class="nav-number">10.</span> <span class="nav-text">re.split</span></a></li></ol></div>
      </div>
      <!--/noindex-->

      <div class="site-overview-wrap sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
    <img class="site-author-image" itemprop="image" alt="Lanqilu"
      src="http://img.whl123456.top/image/avatar.jpg">
  <p class="site-author-name" itemprop="name">Lanqilu</p>
  <div class="site-description" itemprop="description"></div>
</div>
<div class="site-state-wrap motion-element">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
          <a href="/archives/">
        
          <span class="site-state-item-count">85</span>
          <span class="site-state-item-name">日志</span>
        </a>
      </div>
      <div class="site-state-item site-state-categories">
            <a href="/categories/">
          
        <span class="site-state-item-count">18</span>
        <span class="site-state-item-name">分类</span></a>
      </div>
      <div class="site-state-item site-state-tags">
            <a href="/tags/">
          
        <span class="site-state-item-count">39</span>
        <span class="site-state-item-name">标签</span></a>
      </div>
  </nav>
</div>
  <div class="links-of-author motion-element">
      <span class="links-of-author-item">
        <a href="https://github.com/lanqilu" title="GitHub → https:&#x2F;&#x2F;github.com&#x2F;lanqilu" rel="noopener" target="_blank"><i class="fa fa-fw fa-github"></i></a>
      </span>
      <span class="links-of-author-item">
        <a href="http://mail.qq.com/cgi-bin/qm_share?t=qm_mailme&email=zqKvoL_noruOqKG2o6_nouCtoaM" title="E-Mail → http:&#x2F;&#x2F;mail.qq.com&#x2F;cgi-bin&#x2F;qm_share?t&#x3D;qm_mailme&amp;email&#x3D;zqKvoL_noruOqKG2o6_nouCtoaM" rel="noopener" target="_blank"><i class="fa fa-fw fa-envelope"></i></a>
      </span>
  </div>



      </div>
        <div class="back-to-top motion-element">
          <i class="fa fa-arrow-up"></i>
          <span>0%</span>
        </div>

    </div>
  </aside>
  <div id="sidebar-dimmer"></div>


      </div>
    </main>

    <footer class="footer">
      <div class="footer-inner">
        

        

<div class="copyright">
  
  &copy; 2019 – 
  <span itemprop="copyrightYear">2020</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">Lanqilu</span>
</div>
  <div class="powered-by">由 <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://theme-next.org/" class="theme-link" rel="noopener" target="_blank">NexT.Gemini</a> 强力驱动
  </div>

        








      </div>
    </footer>
  </div>

  
  <script src="/lib/anime.min.js"></script>
  <script src="/lib/pjax/pjax.min.js"></script>
  <script src="//cdn.jsdelivr.net/npm/jquery@3/dist/jquery.min.js"></script>
  <script src="//cdn.jsdelivr.net/gh/fancyapps/fancybox@3/dist/jquery.fancybox.min.js"></script>
  <script src="//cdn.jsdelivr.net/npm/pangu@4/dist/browser/pangu.min.js"></script>
  <script src="/lib/velocity/velocity.min.js"></script>
  <script src="/lib/velocity/velocity.ui.min.js"></script>

<script src="/js/utils.js"></script>

<script src="/js/motion.js"></script>


<script src="/js/schemes/pisces.js"></script>


<script src="/js/next-boot.js"></script>

  <script>
var pjax = new Pjax({
  selectors: [
    'head title',
    '#page-configurations',
    '.content-wrap',
    '.post-toc-wrap',
    '.languages',
    '#pjax'
  ],
  switches: {
    '.post-toc-wrap': Pjax.switches.innerHTML
  },
  analytics: false,
  cacheBust: false,
  scrollTo : !CONFIG.bookmark.enable
});

window.addEventListener('pjax:success', () => {
  document.querySelectorAll('script[data-pjax], script#page-configurations, #pjax script').forEach(element => {
    var code = element.text || element.textContent || element.innerHTML || '';
    var parent = element.parentNode;
    parent.removeChild(element);
    var script = document.createElement('script');
    if (element.id) {
      script.id = element.id;
    }
    if (element.className) {
      script.className = element.className;
    }
    if (element.type) {
      script.type = element.type;
    }
    if (element.src) {
      script.src = element.src;
      // Force synchronous loading of peripheral JS.
      script.async = false;
    }
    if (element.dataset.pjax !== undefined) {
      script.dataset.pjax = '';
    }
    if (code !== '') {
      script.appendChild(document.createTextNode(code));
    }
    parent.appendChild(script);
  });
  NexT.boot.refresh();
  // Define Motion Sequence & Bootstrap Motion.
  if (CONFIG.motion.enable) {
    NexT.motion.integrator
      .init()
      .add(NexT.motion.middleWares.subMenu)
      .add(NexT.motion.middleWares.postList)
      .bootstrap();
  }
  NexT.utils.updateSidebarPosition();
});
</script>




  




  
<script src="/js/local-search.js"></script>













    <div id="pjax">
  

  

  

    </div>
</body>
</html>
